## master preamble ------------------------------------------------------
rm(list=ls(all=TRUE)) 

## inherit directories from stata -----------
args = commandArgs(trailingOnly = "TRUE")
if (length(args)) {
  dir.proj = paste0(args[1],'/')
  dir.lib  = args[2]
} 

## if loading from library -------------------
if(dir.lib!='') {
  ## load depenencies --------------------
  library(R.utils,lib.loc=dir.lib)
  library(ggplot2,lib.loc=dir.lib)
  library(tidyr,lib.loc=dir.lib)
  library(stringr,lib.loc=dir.lib)
  library(forcats,lib.loc=dir.lib)
  library(lubridate,lib.loc=dir.lib)
  library(haven,lib.loc=dir.lib)
  library(data.table,lib.loc=dir.lib)
  library(backports,lib.loc=dir.lib)
  ## load main packages ------------------
  library(rio,lib.loc=dir.lib)
  library(dplyr,lib.loc=dir.lib)
  library(tidyverse,lib.loc=dir.lib)
  library(fixest,lib.loc=dir.lib)
  library(broom,lib.loc=dir.lib)
## otherwise: simple load -----------------
} else {
  library(rio)
  library(dplyr)
  library(tidyverse)
  library(fixest)
  library(broom)
}
## --------------------------------------------------------------------

## file preamble ------------------------------------------------------
## randomization seed --------
set.seed(413)

## directories --------
dir.data = paste0(dir.proj,'data/out/')
dir.out  = paste0(dir.proj,'estimates/')
dir.temp = paste0(dir.proj,'_temp/')

## file-specific -------
name.out = 'exper_b5'

## parameters --------
bootrep  = 100
bin.width = 0.05

## functions ----------------
rudirichlet <- function(n, d) {
  rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
  dirichlet.weights <- rexp.mat / rowSums(rexp.mat)
  dirichlet.weights }
## ---------------------------------------------------------------------


## Setup data ------------------------------------------------
data = import(paste0(dir.temp,'data_exper.dta'))

## G = heterogeneity group ------
data = mutate(
  data,
  G=as.numeric(hiexp),Gfe=as.factor(G))
maxG = 1

## generic polynomials in Z -----
data = mutate(
  data,
  Z1=Z^1,Z2=Z^2,Z3=Z^3,Z4=Z^4,Z5=Z^5,Z6=Z^6,Z7=Z^7,Z8=Z^8)

## Setup for clustered bootstrap -----------
list    = unique(select(data,jid))
weights = rudirichlet(nrow(list),bootrep)*10
## ---------------------------------------------------------



## Looping -----------
for(j in 0:bootrep) {
  
  ## Setup bootstrap data --------
  if(j==0) {
    boot.list = list %>% mutate(bootwt = 1)
    boot.data = inner_join(data,boot.list,by='jid') }
  if(j>=1) {
    boot.list = list %>% mutate(bootwt = weights[,j])
    boot.data = inner_join(data,boot.list,by='jid') }
  
  ## combined boot and fe weights ------
  boot.data = mutate(boot.data,brwt=rwt*bootwt)
  
  ## construct instrument bins ---------
  width = bin.width 
  nbin = 1/width 
  boot.data = mutate(
    boot.data,
    bin = as.numeric(as.factor(cut(Z,seq(0,1,width),right=F,include.lowest=T))))
  
  ## loop over outcomes -------
  Y.data = boot.data %>% select(cite_ny1,contest)
  for(k in 1:ncol(Y.data)) {
    
    ## assign outcome -------
    boot.data = mutate(boot.data,Y=Y.data[,k])
    
    ## get group weights -----------
    mu = boot.data %>% group_by(G) %>% summarize(N=sum(brwt)) %>%
      mutate(wt = N/sum(N)) %>% select(c(G,wt))
    
    ## get sample means ------------
    mu = inner_join(
      mu,
      boot.data %>% group_by(G) %>% summarize(
        Y = weighted.mean(Y,brwt),
        p = weighted.mean(D,brwt)),by='G')
    
    ## Adjusted harshness --------
    fit = feols(D~i(G,keep=seq(1,maxG))|totfe,boot.data,weights=~brwt)
    mu = mutate(
      mu,
      padj = as.numeric(c(0,fit$coefficients))+mean(predict(fit,newdata=boot.data,fixef=T)$totfe))
    
    ## Get Y0D0 and Y1D1 ----------
    fit  = feols(Y~0+D*i(G)-D,boot.data,weights=~brwt)
    coef = broom::tidy(fit) %>%
      separate(term,sep='::',into=c('stub1','stub2')) %>%
      separate(stub2,sep=':',into=c('G','D'),convert=T,fill='right')
    mu = inner_join(
      mu, 
      inner_join(
        coef %>% filter(is.na(D)) %>% select(G,y0d0=estimate),
        coef %>% filter(!is.na(D)) %>% select(G,delta=estimate),by='G') %>% mutate(y1d1=y0d0+delta) %>%
        select(G,y0d0,y1d1),by='G')
    
    ## binned regression fit ----------------
    fe.use = c('totfe','Gfe')
    fit = fixest::feols(
      as.formula(glue::glue('Y~i(bin,i.G,ref=1)|',paste(fe.use,collapse='+'))),
      boot.data,weights=~brwt)
    
    ## pull estimated FE -----------
    fe = bind_cols(
      boot.data %>% select(G,bootwt,brwt),
      stats::predict(fit,newdata=boot.data,fixef=T))
    fe.out = fe %>% mutate(sumfe=totfe+Gfe) %>%
      group_by(G) %>% summarize(fe=weighted.mean(sumfe,brwt))
    
    ## Get endpoints ------
    coef = broom::tidy(fit) %>%
      mutate(temp = stringr::str_remove(term,'bin::')) %>%
      separate(temp,sep=':G::',into=c('bin','G'),convert=T) %>% 
      filter(bin==nbin) %>% select(G,sum=estimate)
    
    ## compile stuff (with reformat) --------
    est = mu %>% 
      left_join(fe.out,by='G') %>% left_join(coef,by='G') %>% 
      mutate(
        y0=fe,y1=fe+sum,ate=(y1-y0),att=(Y-y0)/p,atu=(y1-Y)/(1-p),
        y1d0=(1/(1-p))*y1-(p/(1-p))*y1d1,y0d1=(1/p)*y0-((1-p)/p)*y0d0,
        y0d1my0d0=y0d1-y0d0,attmatu=att-atu) %>% select(-c(fe,sum)) %>%
      pivot_longer(!G,names_to='par',values_to='est') %>% as.data.frame() %>%
      mutate(yvar=names(Y.data)[k])
    
    ## add y0 and y1 differences ----------
    ## store under G=1 in estimates table -----
    d0 = filter(est,G==1,par=='y0')$est - filter(est,G==0,par=='y0')$est
    d1 = filter(est,G==1,par=='y1')$est - filter(est,G==0,par=='y1')$est
    est = bind_rows(
      est,
      data.frame(
        G=c(1,1),par=c('d0','d1'),est=c(d0,d1),yvar=names(Y.data[k])))
    
    ## Post output ------------
    if(j==0 & k==1) {
      boot.out = est %>% mutate(iter=j) }
    else { 
      boot.out = bind_rows(boot.out,est %>% mutate(iter=j)) }
    
    ## End of outcome loop ---------
  }
  
  
  ## Progress report -----
  print(paste('Iteration',j,'of',bootrep,'Completed'))
  
}
## End of bootstrap -------------------------



## Bootstrap output: parameters -------
est.main = boot.out %>% filter(iter==0) %>% select(-c(iter))
se.main  = boot.out %>% filter(iter>=1) %>% group_by(yvar,G,par) %>%
  summarize(se=sd(est),lq=quantile(est,0.05),uq=quantile(est,0.95))
est.out = inner_join(est.main,se.main) %>% mutate(lb=est-1.96*se,ub=est+1.96*se) %>%
  select(yvar,G,par,est,se,lq,uq,lb,ub)
export(est.out,paste0(dir.out,name.out,'.csv'))




